Setup

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

Importing Full Dataset

all_objects = 
  read_csv("data/MetObjects.csv") |> 
  janitor::clean_names()
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 484956 Columns: 54
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (45): Object Number, Gallery Number, Department, Object Name, Title, Cul...
## dbl  (5): Object ID, AccessionYear, Constituent ID, Object Begin Date, Objec...
## lgl  (4): Is Highlight, Is Timeline Work, Is Public Domain, Metadata Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Most Common Object Classifiations

Prepping Data for Analysis

  • Filtering to only include objects with a classification value, then trying to group objects into specific types.
  • This needs to be cleaned up further, as there are still semi-duplicate categories.
with_classification = 
  all_objects |> 
  filter(!is.na(classification) & classification != "(not assigned)") |> 
  mutate(
    classification = str_remove(classification, "[-|].*")
  )

with_classification |> 
  group_by(classification) |> 
  summarize(
    classification_n = n()
  ) |> 
  knitr::kable()
classification classification_n
(not assigned) 1
Accessory 294
Aerophone 1982
Aerophone and Idiophone 15
Albums 1418
Amber 33
Archery Equipment 927
Architectural Models 8
Archives 77
Armor 12
Armor Parts 1068
Armor for Child 5
Armor for Horse 36
Armor for Horse and Man 15
Armor for Man 201
Arms and Armor 23
Assemblages 5
Bamboo 122
Banners 75
Bark 388
Barkcloth 72
Basketry 298
Beads 244
Bindings 62
Blocks 254
Bone 242
Bone/Ivory 373
Books 20808
Books & Manuscripts 19
Books and book bindings 9
Brigandines 8
Bronzes 1824
Calligraphy 352
Celluloid 6
Ceramics 23126
Cesnola Inscriptions 187
Chess Sets 328
Chordophone 1324
Chordophone and Aerophone 3
Clay 831
Cloisonné 181
Coat of mail and plate 8
Codices 1309
Coins 1088
Collages 69
Combination Weapons 38
Costume Accessories 76
Costumes 609
Cricket cages 51
Crèche 265
Cut Paper 1069
Cylinder seals 1
Daggers 826
Decorative arts 1
Documents 6
Drawings 28313
Electrophone 1
Enamels 1011
Ephemera 1840
Equestrian Equipment 758
Faience 107
Fans 561
Feathers 52
Fencing Equipment 23
Firearms 619
Firearms Accessories 442
Firearms Parts 29
Forgeries 27
Frames 387
Furniture 394
Gaming pieces 119
Gems 1607
Gesso 1
Glass 8365
Gold 2
Gold and Silver 1957
Gourd 45
Hardstone 254
Helmet Crests 52
Helmets 617
Helmets Parts 22
Hide 56
Horn 29
Horology 947
Idiophone 1143
Illustrated Books 718
Ink 98
Inkstone 18
Inrō 539
Installations 47
Ivories 455
Ivories and Bone 364
Ivory 1
Ivory/Bone 396
Jade 1664
Jewelry 2344
Jewelry, Precious Metals and Precious Stones 48
Knives 263
Kris Stand 4
Krisses 199
Lacquer 843
Lamps 2
Lapidary Work 240
Leather 56
Leatherwork 84
Letters 23
Lighting 62
Machines & Appliances 18
Mail 175
Main dress 475
Manuscript Materials 215
Manuscripts 91
Manuscripts and Illuminations 303
Masks 23
Medals 133
Medals and Plaquettes 2068
Membranophone 457
Metal 3241
Metalwork 14218
Miniatures 255
Mirrors 152
Miscellaneous 2331
Miscellany 16
Models 48
Mosaics 7
Musical Instruments 6
Musical instruments 43
Natural Substances 281
Negatives 6156
Netsuke 942
Night and Dressing Wear 1
Ojime 150
Ornament & Architecture 493
Outerwear 74
Painted Canvases 4
Paintings 9185
Paper 199
Papier Mâché 9
Papyrus 198
Parchment 10
Parts 3
Pastels & Oil Sketches on Paper 20
Periodicals 172
Photographic Equipment 14
Photographs 38777
Plaquettes 38
Plaster 23
Plaster cast 1
Plastic 26
Plates 129
Portfolio Covers 4
Portfolios 436
Portraits 3
Postage Stamps 6
Postcards 1
Posters 224
Printed matter 17
Prints 120942
Reproductions 260
Rubbing 450
Saddle Plates 8
Scientific Instruments 11
Screens 7
Sculpture 6036
Seals 271
Shafted Weapons 954
Sharkskin 1
Shell 225
Shields 193
Silver 20
Sketchbooks 11
Smoking Equipment 14
Snuff Bottles 629
Snuffboxes 17
Soapstone 8
Sound Recordings 2
Steatites 11
Stencils 569
Stone 3343
Stone Sculpture 1009
Stucco 1810
Surcoat 18
Sword Blades 78
Sword Fittings 6
Sword Furniture 3157
Swords 1297
Terracottas 1771
Textiles 32013
Tobacco Pouches 14
Tomb Pottery 285
Tools 17
Transparencies 222
Underwear 2
Variable Media 282
Vases 21344
Wallpaper 214
Wax 2
Wood 3176
Woodblocks 71
Woodwork 2327
Works on Paper 255
boxes 4
games or toys 2
modern (casts, models, display material, impressions) 2
relief [see also architecture, stelae, palettes] 1
textiles: sheets, shrouds, bandages [see clothing, painting] 1

Plotting the Cumulative Sum of Objects Acquired Over Time, by Classification

  • For readability, I limited the plot to only include classifications with more than 3000 total acquisitions by 2023. Without doing this, there are a ton of less common categories hanging out at the very bottom and cluttering up the plot.
  • This could be improved by modifying the legend to have the classifications listed by popularity in 2023 in the legend.
cumulative_classification = 
  with_classification |> 
  group_by(classification, accession_year) |> 
  summarize(
    count = n()
  ) |> 
  group_by(classification) |> 
  mutate(
    cumulative_count = cumsum(count)
  ) |> 
  arrange(desc(cumulative_count))
## `summarise()` has grouped output by 'classification'. You can override using
## the `.groups` argument.
most_acquired_classifications =
  cumulative_classification |> 
  group_by(classification) |> 
  arrange(desc(accession_year)) |> 
  filter(row_number() == 1 & cumulative_count > 3000)


cumulative_classification |> 
  filter(classification %in% most_acquired_classifications$classification) |> 
  plot_ly(
    x = ~accession_year, y = ~cumulative_count, type = "scatter", mode = "line", 
    color = ~classification, colors = "viridis") |>
  layout(
    xaxis = list(title = "Year"), 
    yaxis = list(title = "Total Acquired")
  )  
## Warning: Ignoring 14 observations